rm(list = ls()[ls()!="drop_attn_fails"])
library(tidyverse)
library(readr)
library(psych)
library(ggcorrplot)
library(survey)
library(cregg)
library(tidycat)
library(broom)
library(anesrake)

#drop_attn_fails <- TRUE

load(file = "../working/survey_data_for_quota_checks.Rdata")

sub_rep_names <- c("sub_rep_1_1", "sub_rep_1_2", "sub_rep_1_3", "sub_rep_1_4")
surr_rep_names <- c("surr_rep_1_1", "surr_rep_1_2", "surr_rep_1_3")
desc_rep_names <- c("desc_rep_1_1", "desc_rep_1_2", "desc_rep_1_3", "desc_rep_1_4", "desc_rep_1_5")
just_rep_names <- c("just_rep1_1", "just_rep1_2", "just_rep1_3", "just_rep1_4")
pers_rep_names <- c("pers_rep1_1", "pers_rep1_2", "pers_rep1_3", "pers_rep1_4")
resp_rep_names <- c("respons_rep1_1", "respons_rep1_2", "respons_rep1_3")

all_names <- c(sub_rep_names, surr_rep_names, desc_rep_names, just_rep_names, pers_rep_names, resp_rep_names)

if(drop_attn_fails){

    # Drop attention check failures
  
  uk <- uk %>% filter(pers_rep1_5 == "Agree") 
  us <- us %>% filter(pers_rep1_5 == "Agree") 
  de <- de %>% filter(pers_rep1_5 == "Stimme zu") 

  attn <- "_attn"
    
} else{
  
  uk$wgt <- 1
  de$wgt <- 1
  us$wgt <- 1
  
  attn <- "_full"
  
}


## ###########
## Factor analysis of dimensions

# Cronbach alphas

uk_comparable <- uk %>% select(c(paste0(all_names,"_num"), "wgt"))
us_comparable <- us %>% select(c(paste0(all_names,"_num"), "wgt"))
de_comparable <- de %>% select(c(paste0(all_names,"_num"), "wgt"))

all_countries <- rbind(uk_comparable, us_comparable, de_comparable)

get_alphas <- function(country_data){
 
  ca <- country_data %>% summarise(substantive = as.numeric(alpha(polychoric(select(., paste0(sub_rep_names,"_num")), weight = wgt)$rho, check.keys = TRUE)$total[1]),
            descriptive = as.numeric(alpha(polychoric(select(., paste0(desc_rep_names,"_num")), weight = wgt)$rho, check.keys = TRUE)$total[1]),
            justification = as.numeric(alpha(polychoric(select(., paste0(just_rep_names,"_num")), weight = wgt)$rho, check.keys = TRUE)$total[1]),
            personalisation = as.numeric(alpha(polychoric(select(., paste0(pers_rep_names,"_num")), weight = wgt)$rho, check.keys = TRUE)$total[1]),
            responsiveness = as.numeric(alpha(polychoric(select(., paste0(resp_rep_names,"_num")), weight = wgt)$rho, check.keys = TRUE)$total[1]),
            surrogation = as.numeric(alpha(polychoric(select(., paste0(surr_rep_names,"_num")), weight = wgt)$rho, check.keys = TRUE)$total[1]))
  return(ca)
  
   
}

ca_us <- get_alphas(us)
ca_uk <- get_alphas(uk)
ca_de <- get_alphas(de)
ca_all <- get_alphas(all_countries)
  
cronbach <- data.frame(t(ca_uk), t(ca_us), t(ca_de), t(ca_all))
names(cronbach) <- c("UK", "US", "DE", "Combined")
cronbach <- round(cronbach, 2)
cronbach

write.csv(cronbach, file = paste0("../output/tables/cronbach",attn,".csv"))

# Factor loadings by country

get_factor_loadings <- function(country_data, return_table = FALSE){
  
  joint_factor <- country_data %>% 
    select(paste0(all_names,"_num")) %>% 
    fa(nfactors = 6, rotate = "oblimin", fm = "ml",
       weight = country_data$wgt) 
  
  joint_factor_tmp <- joint_factor$loadings %>% 
    as.matrix.data.frame() %>% as_tibble() %>%
    mutate(var = all_names) 
  
  if(return_table){
    
    names(joint_factor_tmp)[-7] <- paste0(gsub("V","Factor ",names(joint_factor_tmp)))[-7]
      
    eigenvs <- tibble(Item = c("Eigenvalue"),
                      `Factor 1` = joint_factor$e.values[1],
                      `Factor 2` = joint_factor$e.values[2],
                      `Factor 3` = joint_factor$e.values[3],
                      `Factor 4` = joint_factor$e.values[4],
                      `Factor 5` = joint_factor$e.values[5],
                      `Factor 6` = joint_factor$e.values[6])
    joint_factor_tmp %>%
      mutate(var = gsub("sub_rep_1_", "Substantive ", var),
             var = gsub("surr_rep_1_", "Surrogation ", var),
             var = gsub("respons_rep1_", "Responsiveness ", var),
             var = gsub("pers_rep1_", "Personalization ", var),
             var = gsub("just_rep1_", "Justification ", var),
             var = gsub("desc_rep_1_", "Descriptive ", var)) %>%
      mutate(var = factor(var, levels = rev(c(paste0("Descriptive ", 1:6),
                                              paste0("Substantive ", 1:6),
                                              paste0("Justification ", 1:6),
                                              paste0("Surrogation ", 1:6),
                                              paste0("Personalization ", 1:6),
                                              paste0("Responsiveness ", 1:6))))) %>%
      rename(Item = var) %>%
      select(7,1:6) %>%
      bind_rows(eigenvs) %>%
      kableExtra::kable(booktabs = TRUE, format = "latex", digits = 2, linesep = "") %>%
      kableExtra::row_spec(23, hline_after = T) %>%
      print() 
    
  }
  
  if(!return_table){
    
  plot_out <- joint_factor_tmp %>%
    pivot_longer(cols = (starts_with("V", ignore.case = F))) %>%
    mutate(name = paste0(gsub("V","Factor ",name),"; Eigenvalue = ", round(joint_factor$Vaccounted[1,as.numeric(gsub("V","",name))],2))) %>%
    mutate(var = gsub("sub_rep_1_", "Substantive ", var),
           var = gsub("surr_rep_1_", "Surrogation ", var),
           var = gsub("respons_rep1_", "Responsiveness ", var),
           var = gsub("pers_rep1_", "Personalization ", var),
           var = gsub("just_rep1_", "Justification ", var),
           var = gsub("desc_rep_1_", "Descriptive ", var)) %>%
    mutate(var = factor(var, levels = rev(c(paste0("Descriptive ", 1:6),
                                            paste0("Substantive ", 1:6),
                                            paste0("Justification ", 1:6),
                                            paste0("Surrogation ", 1:6),
                                            paste0("Personalization ", 1:6),
                                            paste0("Responsiveness ", 1:6))))) %>%
    ggplot(aes(x = abs(value), y = var, group = name, fill = value)) + 
    geom_bar(stat = "identity") + 
    facet_wrap(~name) + xlab("abs(Loading)") + ylab("") + 
    theme_bw() +  
    scale_fill_gradient2(name = "Loading", 
                         high = "black", mid = "white", low = "black", 
                         midpoint=0, guide="none") +
    expand_limits(x = c(0,1)) + 
    theme(strip.background = element_blank(), strip.placement = "outside",
          legend.position = "bottom")
  
  
  return(plot_out)
  }
  
}

uk_joint_loadings <- get_factor_loadings(uk)
us_joint_loadings <- get_factor_loadings(us)
de_joint_loadings <- get_factor_loadings(de)
combined_joint_loadings <- get_factor_loadings(all_countries)

pdf(paste0("../output/plots/joint_loadings/uk_joint_loadings",attn,".pdf"),10.5,7)
print(uk_joint_loadings)
dev.off()

sink(paste0("../output/tables/joint_loadings/uk_joint_loadings",attn,".tex"))
get_factor_loadings(uk, return_table = TRUE)
sink()

pdf(paste0("../output/plots/joint_loadings/de_joint_loadings",attn,".pdf"),10.5,7)
print(de_joint_loadings)
dev.off()

sink(paste0("../output/tables/joint_loadings/de_joint_loadings",attn,".tex"))
get_factor_loadings(de, return_table = TRUE)
sink()

pdf(paste0("../output/plots/joint_loadings/us_joint_loadings",attn,".pdf"),10.5,7)
print(us_joint_loadings)
dev.off()

sink(paste0("../output/tables/joint_loadings/us_joint_loadings",attn,".tex"))
get_factor_loadings(us, return_table = TRUE)
sink()

pdf(paste0("../output/plots/joint_loadings/combined_joint_loadings",attn,".pdf"),10.5,7)
print(combined_joint_loadings)
dev.off()

sink(paste0("../output/tables/joint_loadings/combined_joint_loadings",attn,".tex"))
get_factor_loadings(all_countries, return_table = TRUE)
sink()

## Loading for separate models

var_names_list <- list(pers_rep_names = pers_rep_names,
                       desc_rep_names = desc_rep_names,
                       sub_rep_names = sub_rep_names,
                       surr_rep_names = surr_rep_names,
                       just_rep_names = just_rep_names,
                       resp_rep_names = resp_rep_names)

dimension_names <- c("Personalization", "Descriptive", "Substantive", "Surrogation", "Justification", "Responsiveness")

get_loadings <- function(data = uk){
  
  out <- list()
  i<-1
  for(i in 1:length(dimension_names)){
    
    loadings_out <- data %>% 
      select(paste0(var_names_list[[i]],"_num")) %>% 
      fa(nfactors = 1, rotate = "oblimin", fm = "ml", max.iter = 500,
         weight = data$wgt) 
    
    out[[i]] <- loadings_out$loadings %>% 
      as.matrix.data.frame() %>% as_tibble() %>%
      mutate(var = var_names_list[[i]]) %>%
      pivot_longer(cols = (starts_with("V", ignore.case = F))) %>%
      mutate(name = gsub("V","Factor ",name),
             dimension = dimension_names[i])  
    
    out[[i]] <- out[[i]] %>% 
      ggplot(aes(x = abs(value), y = var, group = dimension, fill = value)) + 
      geom_bar(stat = "identity") + 
      facet_wrap(~dimension) + xlab("abs(Loading)") + ylab("") + theme_bw() +  
      scale_fill_gradient2(name = "Loading",
                           high = "blue", mid = "white", low = "red",
                           midpoint = 0, guide = "none") +
      expand_limits(x = c(0,1))
    
  } 

  grid.newpage()
  grid.draw(cbind(rbind(ggplotGrob(out[[1]]), ggplotGrob(out[[2]]), ggplotGrob(out[[3]])),
                  rbind(ggplotGrob(out[[4]]), ggplotGrob(out[[5]]), ggplotGrob(out[[6]])), size = "last"))
  
  }

library(grid)

pdf(paste0("../output/plots/individual_loadings/combined_individual_loadings",attn,".pdf"),12,8)
get_loadings(data = all_countries)
dev.off()

pdf(paste0("../output/plots/individual_loadings/uk_individual_loadings",attn,".pdf"),12,8)
uk_loadings <- get_loadings(data = uk)
dev.off()

pdf(paste0("../output/plots/individual_loadings/us_individual_loadings",attn,".pdf"),12,8)
us_loadings <- get_loadings(data = us)
dev.off()

pdf(paste0("../output/plots/individual_loadings/de_individual_loadings",attn,".pdf"),12,8)
de_loadings <- get_loadings(data = de)
dev.off()

## ###########
## Factor analysis of dimensions

fa_func <- function(x, wgt) {
  
  out <- as.numeric(fa(x, nfactors = 1, fm = "ml", max.iter = 500, cor = "poly", weight = wgt)$scores)
  
  return(out)
}

uk <- uk %>% 
  mutate(across(all_of(all_names), ~as.numeric(factor(.x,levels = c("Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree"))), .names = "{.col}_num")) %>%
  # Apply factor analysis function
  mutate(sub_fa = fa_func(select(.,paste0(sub_rep_names,"_num")), wgt = wgt),
         surr_fa = fa_func(select(.,paste0(surr_rep_names,"_num")), wgt = wgt),
         desc_fa = fa_func(select(.,paste0(desc_rep_names,"_num")), wgt = wgt),
         just_fa = fa_func(select(.,paste0(just_rep_names,"_num")), wgt = wgt),
         pers_fa = fa_func(select(.,paste0(pers_rep_names,"_num")), wgt = wgt),
         resp_fa = fa_func(select(.,paste0(resp_rep_names,"_num")), wgt = wgt)
  ) 

de <- de %>% 
  # Recode batteries to numeric
  mutate(across(all_of(all_names), ~as.numeric(factor(.x,levels = c("Stimme überhaupt nicht zu", "Stimme nicht zu", "Stimme weder zu noch nicht zu", "Stimme zu", "Stimme voll und ganz zu"))), .names = "{.col}_num")) %>%
  # Apply factor analysis function
  mutate(sub_fa = fa_func(select(.,paste0(sub_rep_names,"_num")), wgt = wgt),
         surr_fa = fa_func(select(.,paste0(surr_rep_names,"_num")), wgt = wgt),
         desc_fa = fa_func(select(.,paste0(desc_rep_names,"_num")), wgt = wgt),
         just_fa = fa_func(select(.,paste0(just_rep_names,"_num")), wgt = wgt),
         pers_fa = fa_func(select(.,paste0(pers_rep_names,"_num")), wgt = wgt),
         resp_fa = fa_func(select(.,paste0(resp_rep_names,"_num")), wgt = wgt)
  ) 

us <- us %>% 
  # Recode batteries to numeric
  mutate(across(all_of(all_names), ~as.numeric(factor(.x,levels = c("Strongly disagree", "Disagree", "Neither agree nor disagree", "Agree", "Strongly agree"))), .names = "{.col}_num")) %>%
  # Apply factor analysis function
  mutate(sub_fa = fa_func(select(.,paste0(sub_rep_names,"_num")), wgt = wgt),
         surr_fa = fa_func(select(.,paste0(surr_rep_names,"_num")), wgt = wgt),
         desc_fa = fa_func(select(.,paste0(desc_rep_names,"_num")), wgt = wgt),
         just_fa = fa_func(select(.,paste0(just_rep_names,"_num")), wgt = wgt),
         pers_fa = fa_func(select(.,paste0(pers_rep_names,"_num")), wgt = wgt),
         resp_fa = fa_func(select(.,paste0(resp_rep_names,"_num")), wgt = wgt)
  )


## Rotate factor scores to have certain polarities

if(cor(uk$sub_fa, uk$sub_rep_1_1_num, use = "complete.obs") < 0) uk$sub_fa <- uk$sub_fa*-1 # Higher means wants more substantive representation
if(cor(uk$surr_fa, uk$surr_rep_1_1_num, use = "complete.obs") > 0) uk$surr_fa <- uk$surr_fa*-1 # Higher means wants (or will tolerate) more surrogate representation
if(cor(uk$desc_fa, uk$desc_rep_1_1_num, use = "complete.obs") < 0) uk$desc_fa <- uk$desc_fa*-1 # Higher means wants more descriptive representation
if(cor(uk$just_fa, uk$just_rep1_1_num, use = "complete.obs") < 0) uk$just_fa <- uk$just_fa*-1 # Higher means wants politicians to appeal to society as a whole
if(cor(uk$pers_fa, uk$pers_rep1_4_num, use = "complete.obs") < 0) uk$pers_fa <- uk$pers_fa*-1 # Higher means wants more personalization
if(cor(uk$resp_fa, uk$respons_rep1_2_num, use = "complete.obs") < 0) uk$resp_fa <- uk$resp_fa*-1 # Higher means wants more responsiveness

if(cor(us$sub_fa, us$sub_rep_1_1_num, use = "complete.obs") < 0) us$sub_fa <- us$sub_fa*-1 # Higher means wants more substantive representation
if(cor(us$surr_fa, us$surr_rep_1_1_num, use = "complete.obs") > 0) us$surr_fa <- us$surr_fa*-1 # Higher means wants (or will tolerate) more surrogate representation
if(cor(us$desc_fa, us$desc_rep_1_1_num, use = "complete.obs") < 0) us$desc_fa <- us$desc_fa*-1 # Higher means wants more descriptive representation
if(cor(us$just_fa, us$just_rep1_1_num, use = "complete.obs") < 0) us$just_fa <- us$just_fa*-1 # Higher means wants politicians to appeal to society as a whole
if(cor(us$pers_fa, us$pers_rep1_4_num, use = "complete.obs") < 0) us$pers_fa <- us$pers_fa*-1 # Higher means wants more personalization
if(cor(us$resp_fa, us$respons_rep1_2_num, use = "complete.obs") < 0) us$resp_fa <- us$resp_fa*-1 # Higher means wants more responsiveness

if(cor(de$sub_fa, de$sub_rep_1_1_num, use = "complete.obs") < 0) de$sub_fa <- de$sub_fa*-1 # Higher means wants more substantive representation
if(cor(de$surr_fa, de$surr_rep_1_1_num, use = "complete.obs") > 0) de$surr_fa <- de$surr_fa*-1 # Higher means wants (or will tolerate) more surrogate representation
if(cor(de$desc_fa, de$desc_rep_1_1_num, use = "complete.obs") < 0) de$desc_fa <- de$desc_fa*-1 # Higher means wants more descriptive representation
if(cor(de$just_fa, de$just_rep1_1_num, use = "complete.obs") < 0) de$just_fa <- de$just_fa*-1 # Higher means wants politicians to appeal to society as a whole
if(cor(de$pers_fa, de$pers_rep1_4_num, use = "complete.obs") < 0) de$pers_fa <- de$pers_fa*-1 # Higher means wants more personalization
if(cor(de$resp_fa, de$respons_rep1_2_num, use = "complete.obs") < 0) de$resp_fa <- de$resp_fa*-1 # Higher means wants more responsiveness


## ###########
## Check polarities of dimensions

cor(uk$sub_fa, uk$sub_rep_1_1_num, use = "complete.obs") # Should be positive
cor(uk$desc_fa, uk$desc_rep_1_1_num, use = "complete.obs") # Should be positive
cor(uk$surr_fa, uk$surr_rep_1_1_num, use = "complete.obs") # Should be negative
cor(uk$just_fa, uk$just_rep1_1_num, use = "complete.obs") # Should be positive
cor(uk$pers_fa, uk$pers_rep1_1_num, use = "complete.obs") # Should be negative
cor(uk$resp_fa, uk$respons_rep1_1_num, use = "complete.obs") # Should be negative

cor(de$sub_fa, de$sub_rep_1_1_num, use = "complete.obs") # Should be positive
cor(de$desc_fa, de$desc_rep_1_1_num, use = "complete.obs") # Should be positive
cor(de$surr_fa, de$surr_rep_1_1_num, use = "complete.obs") # Should be negative
cor(de$just_fa, de$just_rep1_1_num, use = "complete.obs") # Should be positive
cor(de$pers_fa, de$pers_rep1_1_num, use = "complete.obs") # Should be negative
cor(de$resp_fa, de$respons_rep1_1_num, use = "complete.obs") # Should be negative

cor(us$sub_fa, us$sub_rep_1_1_num, use = "complete.obs") # Should be positive
cor(us$desc_fa, us$desc_rep_1_1_num, use = "complete.obs") # Should be positive
cor(us$surr_fa, us$surr_rep_1_1_num, use = "complete.obs") # Should be negative
cor(us$just_fa, us$just_rep1_1_num, use = "complete.obs") # Should be positive
cor(us$pers_fa, us$pers_rep1_1_num, use = "complete.obs") # Should be negative
cor(us$resp_fa, us$respons_rep1_1_num, use = "complete.obs") # Should be negative



dvs <- c("sub_fa", "desc_fa", "surr_fa", "pers_fa", "resp_fa", "just_fa")

## Create new education variable that combines c("Abschluss der polytechnischen Oberschule") and c("Mittlere Reife, Realschulabschluss, Fachschulreife oder gleichwertiger Abschluss")

de <- de %>%
  mutate(educ2 = factor(ifelse(educ == "Abschluss", "Mittlere Reife", as.character(educ)), 
                        levels = c("Hauptschul", "Noch in schulischer", "Mittlere Reife", "Studienabschluss")))

# data = uk
# form = uk_form
# baseline_text = baseline_text_uk
# country = "uk"

plot_coefs <- function(data = de, form = de_form, baseline_text, country){

  data$political_trust <- as.numeric(scale(data$political_trust))
  data$general_ideology <- as.numeric(scale(as.numeric(data$general_ideology)))
  data$satisf_dem <- as.numeric(scale(as.numeric(factor(data$satisf_dem))))
  
  tmp <- do.call("rbind",lapply(dvs, function(dv) {
    m <- lm(as.formula(paste0(dv, form)), data, weights = wgt)
    x <- broom::tidy(m, conf.int = TRUE)
    x <- tidy_categorical(x, m)
    x$dv <- dv
    x
    }
    )) %>% 
    mutate(low = estimate - 1.96 * std.error,
           high = estimate + 1.96 * std.error,
           dv = case_when(dv == "sub_fa" ~ "Substantive",
                          dv == "desc_fa" ~ "Descriptive",
                          dv == "surr_fa" ~ "Surrogation",
                          dv == "just_fa" ~ "Justification",
                          dv == "pers_fa" ~ "Personalization",
                          dv == "resp_fa" ~ "Responsiveness")) %>%
    mutate(dv = factor(dv, levels = c("Substantive","Descriptive","Surrogation", "Justification", "Personalization", "Responsiveness"))) %>%
  filter(term != "(Intercept)") %>%
  filter(!is.na(term)) %>%
  arrange(estimate) %>%
    mutate(level = fct_recode(level,"Trust\n(1-7 scale)" = "political_trust"),
           level = fct_recode(level,"Dem. Satisfaction\n(1-7 scale)" = "satisf_dem"),
           variable = gsub("age_cat|age_cat_b", "Age", variable),
           variable = gsub("political_trust", "Trust", variable),
           variable = gsub("ethnicity", "Ethnicity", variable),
           variable = gsub("RetroVote", "Past Vote", variable),
           variable = gsub("general_ideology", "Ideology", variable),
           variable = gsub("political_interest", "Interest", variable),
           variable = gsub("satisf_dem", "Dem. Satis", variable),
           variable = gsub("educ|educ2", "Education", variable))
  
  if(country == "us") {
    tmp <- tmp %>% mutate(level = fct_recode(level, "Ideology\n(Very Lib. 1-5 Very Con.)" = "general_ideology"))
  } else {
    tmp <- tmp %>% mutate(level = fct_recode(level, "Ideology\n(Left 0-10 Right)" = "general_ideology"))
  }
  
  
  if(country == "de"){
    tmp <- tmp %>% mutate(level = fct_recode(level, "Upper class" = "Oberschicht"),
                          level = fct_recode(level, "Middle class" = "Mittelschicht"),
                          level = fct_recode(level, "University degree" = "Studienabschluss"),
                          level = fct_recode(level, "Abitur/Still in school" = "Noch in schulischer"),
                          level = fct_recode(level, "Intermediate secondary school certificate" = "Mittlere Reife"),
                          #level = fct_recode(level, "Apprenticeship" = "Hauptschul"),
                          level = fct_recode(level, "Very interested" = "Sehr interessiert"),
                          level = fct_recode(level, "Somewhat interested" = "Etwas interessiert"),
                          level = fct_recode(level, "Not very interested" = "Nicht sehr interessiert"),
                          level = fct_recode(level, "Female" = "Weiblich"),
                          level = fct_relevel(level, "Abitur/Still in school", after=26))    
  }
  
  all_vars <- unique(tmp$variable)
  all_vars <- all_vars[!all_vars%in%c("Ideology", "Trust", "Dem. Satis")]
  baselines <- lapply(all_vars, function(d) {
    baselines <- tmp[tmp$variable==d,]
    
    baselines$level <- baseline_text[d]
    baselines$estimate <- 0
    baselines$low <- 0
    baselines$high <- 0  
    baselines
  })
  
  tmp <- bind_rows(bind_rows(baselines), tmp)
  
  # Reorder US levels  
  if(country == "us"){
    tmp <- tmp %>% mutate(level = fct_relevel(level, c("(Base: Lower class)", "Working class", "Middle class", "Upper class", 
                                                               "(Base: None/Grade 11)","High school graduate","Some college, no degree", "College degree", "Master's degree and above"))
    )
  }
  
  # Reorder DE levels  
  if(country == "de"){
    tmp <- tmp %>% mutate(level = fct_relevel(level, 
                                              c("(Base: Secondary school certificate or less)",
                                                "Intermediate secondary school certificate",
                                                "Abitur/Still in school", "University degree"))
    )
  }
  
  sink(paste0("../output/tables/factor_descriptives_",country,attn,".tex"))
  table_out <- tmp %>%
    select(variable, dv, level, estimate, std.error) %>%
    arrange(variable, level)  %>%
    mutate(est = paste0(round(estimate,2), "\n (", round(std.error,2),")")) %>%
    filter(!grepl("Base:", level)) %>%
    select(-estimate,-std.error) %>%
    pivot_wider(names_from = "dv",
                values_from = "est",
                id_cols = c("variable", "level")) %>%
    rename(Level = level,
           Variable = variable) %>%
    select(Variable, Level, Descriptive, Substantive, Personalization, Surrogation, Responsiveness, Justification) %>%
    kableExtra::kable(format = "latex", booktabs = TRUE, escape = TRUE, align = "lllllll", linesep = "", caption = paste0("Correlates of factor scores: ", toupper(country) ," sample, multiple regression models"), label = paste0("factor_scores_", country,"_multi",attn)) %>%
    kableExtra::kable_styling(font_size = 10) %>%
    kableExtra::add_footnote(paste0("Sample size: ",nrow(data),"; The dependent variable in each model is the factor scores for the relevant dimension as estimated from the item batteries we describe in the main body of the paper. Standard errors in parentheses."))
  cat(table_out)
  
  sink()
  
  out <- list()
  out$plot <- tmp %>%
    mutate(dv = factor(dv, levels = c("Substantive", "Descriptive", "Surrogation", "Justification", "Personalization", "Responsiveness"))) %>%
    mutate(variable = factor(variable, levels = c("Age", "Class", "Education", "Ethnicity", "Gender", "Ideology", "Interest", "Past Vote", "Trust", "Dem. Satis"))) %>%
    mutate(p.value.new = p.adjust(p.value, "BH")) %>%
    mutate(sig = p.value.new <= 0.05,
           sig = ifelse(estimate==0,FALSE, sig),
           sig = ifelse(sig, "<= 0.05", "> 0.05")) %>%
  ggplot(aes(x = estimate, xmin = low, xmax = high, y = level, col = sig)) +   
  geom_pointrange(size = .1) + 
  geom_point() + theme_bw() + xlab("Estimate") + ylab("") + 
  geom_vline(xintercept = 0, linetype = 2) + facet_grid(variable ~ dv, scales = "free_y") + 
    theme(strip.background = element_blank(), strip.placement = "outside")  + 
    scale_color_manual("Adjusted p-value",values = c("black","gray")) +
    theme(legend.position = "bottom") + 
    guides(colour = guide_legend(title.position="top", title.hjust = 0.5)) 
  
  out$data <- tmp

  return(out)
  
  #return(tmp)
  
  }


uk_form <- "~ Gender + age_cat + Class + RetroVote + general_ideology + political_trust + political_interest + satisf_dem + ethnicity + educ"
us_form <- "~ Gender + age_cat + Class + RetroVote + general_ideology + political_trust + political_interest + satisf_dem + ethnicity + educ"
de_form <- "~ Gender + age_cat_b + Class + RetroVote + general_ideology + political_trust + political_interest + satisf_dem + ethnicity + educ2"

baseline_text_uk <- c("Age" = "(Base: 18-24)",
                   "Education" = "(Base: No qual)",
                   "Ethnicity" = "(Base: White)",
                   "Past Vote" = "(Base: Conservative)",
                   "Class" = "(Base: Working class)",
                   "Gender" = "(Base: Male)",
                   "Interest" = "(Base: Not at all interested)")

baseline_text_us <- c("Age" = "(Base: 18-24)",
                      "Education" = "(Base: None/Grade 11)",
                      "Ethnicity" = "(Base: White)",
                      "Past Vote" = "(Base: Democrat)",
                      "Class" = "(Base: Lower class)",
                      "Gender" = "(Base: Male)",
                      "Interest" = "(Base: Not at all interested)")

baseline_text_de <- c("Age" = "(Base: 18-24)",
                      "Education" = "(Base: Secondary school certificate or less)",
                      "Ethnicity" = "(Base: No migrant background)",
                      "Past Vote" = "(Base: CDU/CSU)",
                      "Class" = "(Base: Working class)",
                      "Gender" = "(Base: Male)",
                      "Interest" = "(Base: Not at all interested)")

uk_mult <- plot_coefs(uk, uk_form, baseline_text_uk, country = "uk")
uk_mult$plot + ggtitle("UK - multivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/uk_multivariate",attn,"_BH.pdf"), width = 15, height = 9)
uk_mult$plot + aes(col=NULL) + ggtitle("UK - multivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/uk_multivariate",attn,".pdf"), width = 15, height = 9)

us_mult <- plot_coefs(data = us, form = us_form, baseline_text_us, "us") 
us_mult$plot + ggtitle("US - multivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/us_multivariate",attn,"_BH.pdf"), width = 15, height = 9)
us_mult$plot + aes(col = NULL) + ggtitle("US - multivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/us_multivariate",attn,".pdf"), width = 15, height = 9)

de_mult <- plot_coefs(data = de, form = de_form, baseline_text_de, "de") 
de_mult$plot + ggtitle("DE - multivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/de_multivariate",attn,"_BH.pdf"), width = 15, height = 9)
de_mult$plot + aes(col = NULL) + ggtitle("DE - multivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/de_multivariate",attn,".pdf"), width = 15, height = 9)

# data = uk
# form = uk_form
# baseline_text = baseline_text_uk
# country = "uk"
plot_coefs_bivariate <- function(data = uk, form = uk_form, baseline_text, country){
  
  vars <- gsub("~ ","",trimws(strsplit(form, "\\+")[[1]]))
  
  data$political_trust <- as.numeric(scale(data$political_trust))
  data$general_ideology <- as.numeric(scale(as.numeric(data$general_ideology)))
  data$satisf_dem <- as.numeric(scale(as.numeric(factor(data$satisf_dem))))
  
  for_plot <- do.call("rbind", 
          lapply(dvs, function(dv) {
    
    y <- do.call("rbind", lapply(vars, function(var){
      x <- broom::tidy(lm(as.formula(paste0(dv, "~", var)), data, weights = wgt))  
      x$dv <- dv
      x$iv <- var
      x
    }
    )
    )
    
    y
    
  }
  )) %>% 
    mutate(low = estimate - 1.96 * std.error,
           high = estimate + 1.96 * std.error,
           dv = case_when(dv == "sub_fa" ~ "Substantive",
                          dv == "desc_fa" ~ "Descriptive",
                          dv == "surr_fa" ~ "Surrogation",
                          dv == "just_fa" ~ "Justification",
                          dv == "pers_fa" ~ "Personalization",
                          dv == "resp_fa" ~ "Responsiveness"),
           iv = ifelse(iv == "general_ideology", "Ideology", iv),
           iv = ifelse(iv == "political_trust", "Trust", iv),
           iv = ifelse(iv == "political_interest", "Interest", iv),
           #iv = ifelse(iv == "RetroVote", "Retro. vote", iv),
           iv = ifelse(iv %in% c("age_cat","age_cat_b"), "Age", iv),
           iv = ifelse(iv == "ethnicity", "Ethnicity", iv),
           iv = ifelse(iv == "RetroVote", "Past Vote", iv),
           iv = ifelse(iv == "educ", "Education", iv),
           iv = ifelse(iv == "educ2", "Education", iv),
           iv = ifelse(iv == "satisf_dem", "Dem. Satis", iv),
           term = ifelse(term == "general_ideology", "Ideology", term),
           term = ifelse(term == "political_trust", "Trust", term),
           term = ifelse(term == "satisf_dem", "Dem. Satis", term),
           term = gsub("Gender|age_cat|age_cat_b|Class|political_interest|RetroVote|general_ideology|ethnicity|educ|educ2","", term)) %>%
    filter(term != "(Intercept)") %>%
    mutate(term = factor(term, levels = unique(term))) %>%
    mutate(term = fct_recode(term,"Trust\n(1-7 scale)" = "Trust"),
           term = fct_recode(term,"Dem. Satisfaction\n(1-7 scale)" = "Dem. Satis")) %>%
    arrange(estimate) 
    
  if(country == "us") {
    for_plot <- for_plot %>% mutate(term = fct_recode(term, "Ideology\n(Very Lib. 1-5 Very Con.)" = "Ideology"))
  } else {
    for_plot <- for_plot %>% mutate(term = fct_recode(term, "Ideology\n(Left 0-10 Right)" = "Ideology"))
  }
  
  if(country == "de"){
    for_plot <- for_plot %>% mutate(term = fct_recode(term, "Upper class" = "Oberschicht"),
                          term = fct_recode(term, "Middle class" = "Mittelschicht"),
                          term = fct_recode(term, "University degree" = "Studienabschluss"),
                          term = fct_recode(term, "Abitur/Still in school" = "Noch in schulischer"),
                          term = fct_recode(term, "Intermediate secondary school certificate" = "Mittlere Reife"),
                          #term = fct_recode(term, "Apprenticeship" = "Hauptschul"),
                          term = fct_recode(term, "Very interested" = "Sehr interessiert"),
                          term = fct_recode(term, "Somewhat interested" = "Etwas interessiert"),
                          term = fct_recode(term, "Not very interested" = "Nicht sehr interessiert"),
                          term = fct_recode(term, "Female" = "Weiblich"))    
  }
  
  all_vars <- unique(for_plot$iv)
  all_vars <- all_vars[!all_vars%in%c("Ideology", "Trust", "Dem. Satis")]
  baselines <- lapply(all_vars, function(d) {
    baselines <- for_plot[for_plot$iv==d,]
    
    baselines$term <- baseline_text[d]
    baselines$estimate <- 0
    baselines$low <- 0
    baselines$high <- 0  
    baselines
  })
  
  

  for_plot <- bind_rows(bind_rows(baselines), for_plot)

  # Reorder US levels  
  if(country == "us"){
    for_plot <- for_plot %>% mutate(term = fct_relevel(term, c("(Base: Lower class)", "Working class", "Middle class", "Upper class", 
                                                 "(Base: None/Grade 11)","High school graduate","Some college, no degree", "College degree", "Master's degree and above"))
  )
  }
  
  # Reorder DE levels  
  if(country == "de"){
    for_plot <- for_plot %>% mutate(term = fct_relevel(term, 
                                              c("(Base: Secondary school certificate or less)",
                                                "Intermediate secondary school certificate",
                                                "Abitur/Still in school", "University degree"))
    )
  }
  
  sink(paste0("../output/tables/factor_descriptives_bivariate_",country,attn,".tex"))
  table_out <- for_plot %>%
    select(iv, dv, term, estimate, std.error) %>%
    arrange(iv, term)  %>%
    mutate(est = paste0(round(estimate,2), "\n (", round(std.error,2),")")) %>%
    filter(!grepl("Base:", term)) %>%
    select(-estimate,-std.error) %>%
    pivot_wider(names_from = "dv",
                values_from = "est",
                id_cols = c("iv", "term")) %>%
    rename(Level = term,
           Variable = iv) %>%
    select(Variable, Level, Descriptive, Substantive, Personalization, Surrogation, Responsiveness, Justification) %>%
    kableExtra::kable(format = "latex", booktabs = TRUE, escape = TRUE, align = "lllllll", linesep = "", caption = paste0("Correlates of factor scores: ", toupper(country) ," sample, bivariate regression models"), label = paste0("factor_scores_", country,"_bivariate",attn)) %>%
    kableExtra::kable_styling(font_size = 10) %>%
    kableExtra::add_footnote(paste0("Sample size: ",nrow(data),"; The dependent variable in each model is the factor scores for the relevant dimension as estimated from the item batteries we describe in the main body of the paper. Standard errors in parentheses."), notation = "number")
  cat(table_out)
  
  sink()

  
  out <- list()
  
  out$plot <- for_plot %>%
      mutate(dv = factor(dv, levels =  c("Substantive", "Descriptive", "Surrogation", "Justification", "Personalization", "Responsiveness"))) %>%
      mutate(iv = factor(iv, levels = c("Age", "Class", "Education", "Ethnicity", "Gender", "Ideology", "Interest", "Past Vote", "Trust", "Dem. Satis"))) %>%
    mutate(p.value.new = p.adjust(p.value, "BH")) %>%
    mutate(sig = p.value.new <= 0.05,
           sig = ifelse(estimate==0,FALSE, sig),
           sig = ifelse(sig, "<= 0.05", "> 0.05")) %>%
    ggplot(aes(x = estimate, xmin = low, xmax = high, y = term, col = sig)) + 
    geom_errorbarh(height = .01) + 
    geom_point() + theme_bw() + xlab("Estimate") + ylab("") + 
    geom_vline(xintercept = 0, linetype = 2) + facet_grid(iv ~ dv, scales = "free_y") + 
      theme(strip.background = element_blank(), strip.placement = "outside") + 
    scale_color_manual("Adjusted p-value",values = c("black","gray")) +
    theme(legend.position = "bottom") + 
    guides(colour = guide_legend(title.position="top", title.hjust = 0.5)) 

    out$data <- for_plot
    
    return(out)
    
  }

uk_biv <- plot_coefs_bivariate(uk, uk_form, baseline_text_uk, country = "uk")
uk_biv$plot + ggtitle("UK - bivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/uk_bivariate",attn,"_BH.pdf"), width = 15, height = 9)
uk_biv$plot + aes(col = NULL) + ggtitle("UK - bivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/uk_bivariate",attn,".pdf"), width = 15, height = 9)


us_biv <- plot_coefs_bivariate(data = us, form = us_form, baseline_text_us, country = "us") 
us_biv$plot + ggtitle("US - bivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/us_bivariate",attn,"_BH.pdf"), width = 15, height = 9)
us_biv$plot + aes(col = NULL) + ggtitle("US - bivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/us_bivariate",attn,".pdf"), width = 15, height = 9)



de_biv <- plot_coefs_bivariate(data = de, form = de_form, baseline_text_de, country = "de") 
de_biv$plot + ggtitle("DE - bivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/de_bivariate",attn,"_BH.pdf"), width = 15, height = 9)
de_biv$plot + aes(col = NULL) + ggtitle("DE - bivariate") + xlim(c(-1.75, 1.75))
ggsave(paste0("../output/plots/factor_analysis_descriptive/de_bivariate",attn,".pdf"), width = 15, height = 9)

save(uk, de, us, file = paste0("../working/survey_data",attn,".Rdata"))

writeLines(as.character(nrow(uk)), con = paste0("../output/useful_numbers/uk",attn,"_sample.txt"))
writeLines(as.character(nrow(de)), con = paste0("../output/useful_numbers/de",attn,"_sample.txt"))
writeLines(as.character(nrow(us)), con = paste0("../output/useful_numbers/us",attn,"_sample.txt"))

## Correlation of preferences between dimensions

labels_dimensions <- c("Substantive", "Descriptive", "Surrogation", "Personalisation", "Responsiveness", "Justification")

wgt_cor <- function(data, weights){
  cor_tmp <- cor(data, use = "pairwise.complete.obs")
  var_names <- rownames(cor_tmp)
  for(i in 1:(length(var_names)-1)){
    for(j in (i+1):(length(var_names))){
      tmp <- na.exclude(cbind(data[,c(var_names[c(i,j)])],weights))
      cor_tmp[i,j] <- cov.wt(tmp[,1:2], wt = tmp$weights, cor = TRUE)$cor[1,2]
      cor_tmp[j,i] <- cor_tmp[i,j]
    }
    
  }
  cor_tmp
}

cor_uk <- wgt_cor(uk[,dvs], weights = uk$wgt)
cor_us <- wgt_cor(us[,dvs], weights = us$wgt)
cor_de <- wgt_cor(de[,dvs], weights = de$wgt)

rownames(cor_uk) <- colnames(cor_uk) <- labels_dimensions
rownames(cor_us) <- colnames(cor_us) <- labels_dimensions
rownames(cor_de) <- colnames(cor_de) <- labels_dimensions

cor_uk_plot <- ggcorrplot(cor_uk, lab = TRUE, type = "lower", colors = c("darkgreen", "white", "purple"), legend.title = "", title = "Correlation between dimensions (UK)", show.diag = TRUE) +
  theme(panel.background = element_rect(fill = "transparent", color = NA),
        plot.background = element_rect(fill = "transparent", color = NA)) 

ggsave(paste0("../output/plots/factor_analysis_descriptive/cor_uk",attn,".png"),cor_uk_plot, width = 6, height = 6, dpi = 400)


cor_us_plot <- ggcorrplot(cor_us, lab = TRUE, type = "lower", colors = c("darkgreen", "white", "purple"), legend.title = "", title = "Correlation between dimensions (US)", show.diag = TRUE) +
  theme(panel.background = element_rect(fill = "transparent", color = NA),
        plot.background = element_rect(fill = "transparent", color = NA)) 

ggsave(paste0("../output/plots/factor_analysis_descriptive/cor_us",attn,".png"),cor_us_plot, width = 6, height = 6, dpi = 400)

cor_de_plot <- ggcorrplot(cor_de, lab = TRUE, type = "lower", colors = c("darkgreen", "white", "purple"), legend.title = "", title = "Correlation between dimensions (DE)", show.diag = TRUE) +
  theme(panel.background = element_rect(fill = "transparent", color = NA),
        plot.background = element_rect(fill = "transparent", color = NA)) 

ggsave(paste0("../output/plots/factor_analysis_descriptive/cor_de",attn,".png"),cor_de_plot, width = 6, height = 6, dpi = 400)

